%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% APPENDIX: COMPARE CENSUS VS SATELLITE DEFORESTATION MEASURES

% "DEFORESTATION IN THE AMAZON:
% A UNIFIED FRAMEWORK FOR ESTIMATION AND POLICY ANALYSIS"

% by Eduardo Souza-Rodrigues

% This version: November 2018

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% (A) PREPARE THE DATA SET

% Load the Data Set - Use Small-Medium farms because it is contains all municipalities
data1 = [data_folder,'\land_use_matlab_variables.txt'];
load(data1);
data = land_use_matlab_variables;
clear data1 land_use_matlab_variables;
       
% Name of the variables
Code        = data(:,1);      % Municipality Identifier (IBGE's code)
TC          = data(:,2);      % Transportation cost to the port
Alt         = data(:,3);      % Altitude
Temp        = data(:,4);      % Temperature
Rain        = data(:,5);      % Rain
Slope       = data(:,6);      % Average Slope
Soil        = data(:,7:10);   % Proportion of soil quality = [prop_potenc2 prop_potenc3 prop_potenc4 prop_potenc5]
Pop         = data(:,11);     % Population
Mining      = data(:,12);     % Presence of Mining
Powerplant  = data(:,13);     % Presence of Powerplant
PPNeighbor  = data(:,14);     % Indicator for Powerplant Neighbor
DistPA      = data(:,15);     % Straight Line Distance to nearest Protected Area
Title       = data(:,16);     % Proportion of the Private Area with Land Title
Fines2005   = data(:,17);     % Fines up to 2005
Fines2003   = data(:,18);     % Fines up to 2003
D           = data(:,19:20);  % Straight Line Distances = [dist to port, dist to state capital]
Ibama       = data(:,21);     % Distance to the Nearest Ibama Agency
Pcensus     = data(:,22);     % Share Deforested, Census
Psat        = data(:,23);     % Share Deforested, Satellite
PsatNo      = data(:,24);     % Share Deforested, Satelite (No Forest Regrowth)
Pfarm       = data(:,25);     % Share of Farm Area in Municipal Area
Pclouds     = data(:,26);     % Share of Cloud Area in Municipal Area
PsatTotal   = data(:,27);     % Share of all Satellite Land-Use Areas in Municipal Area

% Number of observations
T  = size(TC,1);    

% Regressors
X = [Psat D Alt Temp Rain Slope Soil DistPA Mining Powerplant PPNeighbor Pop Title Fines2005 Ibama Pclouds ones(T,1)];

% Names
Names_sat = {'Satellite Prop Deforestation','Dist to Port','Dist to Capital',...
            'Altitude','Temperature','Rain','Slope','Soil 2','Soil 3','Soil 4','Soil 5','Dist to PA',...
            'Mining','Powerplant','Power Plant Neighbor','Population','Prop Land Title',...
            'Fines','Dist to Ibama','Prop Clouds','Constant'};

% Dimension of X
dx = size(X,2);

% Vectors That Are Not Needed Anymore (Exogenous Regressors)
clear Alt Temp Rain Slope Soil Pop Mining Powerplant PPNeighbor DistPA Title


%% (B) RUN REGRESSION

% I. Full Sample

% Estimated Coefficient
beta_ols_full = (X'*X)\(X'*Pcensus);

% Residuals
U_full = Pcensus - X*beta_ols_full;

% Robust Variance Matrix
Var_ols_full = T/(T-dx)*inv(X'*X)*(X'*diag(U_full.^2)*X)*inv(X'*X);
   
% Standard Error
se_ols_full = sqrt(diag(Var_ols_full));

% t-statistic
t_ols_full = beta_ols_full./se_ols_full;

% 95%-Confidence Interval
LB_ols_full = beta_ols_full - critical * sqrt(diag(Var_ols_full));
UB_ols_full = beta_ols_full + critical * sqrt(diag(Var_ols_full));
   
% Adjusted-R^2
R2_full  = 1 - (U_full'*U_full/((Pcensus-mean(Pcensus))'*(Pcensus-mean(Pcensus))));
AR2_full = 1 - ((1 - R2_full) * (T - 1) / (T - dx)); 


% II. Restricted Sample

% Select Restricted Sample
index = find((Pfarm > 0.8).*(Pclouds < 0.03).*(PsatTotal <= 1));

% Variables and Sample Size    
Pcensus_restr = Pcensus(index,:);
X_restr       = X(index,:);
T_restr       = size(X_restr,1);

% Estimated Coefficient
beta_ols_restr = (X_restr'*X_restr)\(X_restr'*Pcensus_restr);

% Residuals
U_restr = Pcensus_restr - X_restr*beta_ols_restr;

% Robust Variance Matrix
Var_ols_restr = T_restr/(T_restr-dx)*inv(X_restr'*X_restr)*(X_restr'*diag(U_restr.^2)*X_restr)*inv(X_restr'*X_restr);
   
% Standard Error
se_ols_restr = sqrt(diag(Var_ols_restr));

% t-statistic
t_ols_restr = beta_ols_restr./se_ols_restr;

% 95%-Confidence Interval
LB_ols_restr = beta_ols_restr - critical * sqrt(diag(Var_ols_restr));
UB_ols_restr = beta_ols_restr + critical * sqrt(diag(Var_ols_restr));
   
% Adjusted-R^2
R2_restr  = 1 - (U_restr'*U_restr/((Pcensus_restr-mean(Pcensus_restr))'*(Pcensus_restr-mean(Pcensus_restr))));
AR2_restr = 1 - ((1 - R2_restr) * (T_restr - 1) / (T_restr - dx)); 


%% (C) SHOW ESTIMATED RESULTS

if results_sat == 1
    
    % Show the Estimated Results
    disp('----------------------------------------------------------------------------')
    disp('----------------------------------------------------------------------------')
    disp('REGRESS CENSUS DEFORESTATION ON SATELLITE DEFORESTATION AND COVARIATES')
    table(beta_ols_full,se_ols_full,beta_ols_restr,se_ols_restr,...
        'VariableNames',{'Coeff_FullSample','SE_FullSample','Coeff_RestrictedSample','SE_RestrictedSample'},...
        'RowNames',Names_sat)
    
    table([T;AR2_full],[T_restr;AR2_restr],...
        'VariableNames',{'FullSample','RestrictedSample'},...
        'RowNames',{'Number of Observation','Adjusted R2'})
    disp('----------------------------------------------------------------------------')
    
        
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%    